Organisation: Data Science Platform
Responsible: Juliana Assis ()

1 Introduction to data visualization

## Loading required package: ggplot2
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
## 
## Attaching package: 'reshape'
## The following objects are masked from 'package:reshape2':
## 
##     colsplit, melt, recast
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
## 
## Attaching package: 'ggh4x'
## The following object is masked from 'package:ggplot2':
## 
##     guide_axis_logticks
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
## 
##     rename
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Loading required package: viridis
## Loading required package: viridisLite
## 
## ======================
## Welcome to heatmaply version 1.5.0
## 
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
## 
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags: 
##   https://stackoverflow.com/questions/tagged/heatmaply
## ======================
## [[1]]
## [1] "ggpubr"    "ggplot2"   "stats"     "graphics"  "grDevices" "utils"    
## [7] "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "grid"      "ggpubr"    "ggplot2"   "stats"     "graphics"  "grDevices"
##  [7] "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "tidyr"     "grid"      "ggpubr"    "ggplot2"   "stats"     "graphics" 
##  [7] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "reshape2"  "tidyr"     "grid"      "ggpubr"    "ggplot2"   "stats"    
##  [7] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "reshape"   "reshape2"  "tidyr"     "grid"      "ggpubr"    "ggplot2"  
##  [7] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [13] "base"     
## 
## [[6]]
##  [1] "ggrepel"   "reshape"   "reshape2"  "tidyr"     "grid"      "ggpubr"   
##  [7] "ggplot2"   "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [13] "methods"   "base"     
## 
## [[7]]
##  [1] "ggh4x"     "ggrepel"   "reshape"   "reshape2"  "tidyr"     "grid"     
##  [7] "ggpubr"    "ggplot2"   "stats"     "graphics"  "grDevices" "utils"    
## [13] "datasets"  "methods"   "base"     
## 
## [[8]]
##  [1] "pheatmap"  "ggh4x"     "ggrepel"   "reshape"   "reshape2"  "tidyr"    
##  [7] "grid"      "ggpubr"    "ggplot2"   "stats"     "graphics"  "grDevices"
## [13] "utils"     "datasets"  "methods"   "base"     
## 
## [[9]]
##  [1] "RColorBrewer" "pheatmap"     "ggh4x"        "ggrepel"      "reshape"     
##  [6] "reshape2"     "tidyr"        "grid"         "ggpubr"       "ggplot2"     
## [11] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [16] "methods"      "base"        
## 
## [[10]]
##  [1] "patchwork"    "RColorBrewer" "pheatmap"     "ggh4x"        "ggrepel"     
##  [6] "reshape"      "reshape2"     "tidyr"        "grid"         "ggpubr"      
## [11] "ggplot2"      "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "DT"           "patchwork"    "RColorBrewer" "pheatmap"     "ggh4x"       
##  [6] "ggrepel"      "reshape"      "reshape2"     "tidyr"        "grid"        
## [11] "ggpubr"       "ggplot2"      "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[12]]
##  [1] "kableExtra"   "DT"           "patchwork"    "RColorBrewer" "pheatmap"    
##  [6] "ggh4x"        "ggrepel"      "reshape"      "reshape2"     "tidyr"       
## [11] "grid"         "ggpubr"       "ggplot2"      "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[13]]
##  [1] "plotly"       "kableExtra"   "DT"           "patchwork"    "RColorBrewer"
##  [6] "pheatmap"     "ggh4x"        "ggrepel"      "reshape"      "reshape2"    
## [11] "tidyr"        "grid"         "ggpubr"       "ggplot2"      "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[14]]
##  [1] "bookdown"     "plotly"       "kableExtra"   "DT"           "patchwork"   
##  [6] "RColorBrewer" "pheatmap"     "ggh4x"        "ggrepel"      "reshape"     
## [11] "reshape2"     "tidyr"        "grid"         "ggpubr"       "ggplot2"     
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[15]]
##  [1] "heatmaply"    "viridis"      "viridisLite"  "bookdown"     "plotly"      
##  [6] "kableExtra"   "DT"           "patchwork"    "RColorBrewer" "pheatmap"    
## [11] "ggh4x"        "ggrepel"      "reshape"      "reshape2"     "tidyr"       
## [16] "grid"         "ggpubr"       "ggplot2"      "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"
## Bioconductor version '3.19' is out-of-date; the current release version '3.20'
##   is available with R version '4.4'; see https://bioconductor.org/install
## Bioconductor version 3.19 (BiocManager 1.30.25), R 4.4.1 (2024-06-14)
## Warning: package(s) not installed when version(s) same as or greater than current; use
##   `force = TRUE` to re-install: 'ComplexHeatmap'
## Old packages: 'bookdown', 'clue', 'corrplot', 'curl', 'data.table',
##   'dendextend', 'doBy', 'emmeans', 'evaluate', 'fs', 'GenomicRanges', 'gert',
##   'gtable', 'Hmisc', 'httr2', 'igraph', 'knitr', 'Matrix', 'mvtnorm',
##   'pkgbuild', 'ps', 'quantreg', 'R.oo', 'Rcpp', 'renv', 'rmarkdown',
##   'rstudioapi', 'tinytex', 'waldo', 'withr', 'xfun'

2 Downloading the files and making a Dir called Raw

3 Loading the first file

# Load the TSV files
alpha_info_tab <- read.table("data/raw/01_Alpha_Diversity.tsv", header=T, sep="\t", row.names=1, check.names=T)
#ord_dataframe <- read.table("data/raw/02_ord_DataFrame.tsv", header = TRUE, sep = "\t")
#heatmap_exp1 <- read.table("data/raw/03_HeatMap_Exp1_DCs.tsv", header = TRUE, sep = "\t")

# Load the RDS file
#heatmap_data <- readRDS("data/raw/04_HeatMap.rds")

# Load the CSV file
#data_csv <- read.csv("data/raw/data.csv")

4 Summary: Sample Information

5 Filtering the data using Dyplr package

# Load dplyr


# Filter the data for Study == "Exp1"
alpha_info_tab_filtered <- alpha_info_tab %>%
  filter(Study == "Exp1")

# View the filtered data
alpha_info_tab_filtered

6 BoxPlot

#library(dplyr)
#library(ggplot2)

# Filter for Exp1 and create the boxplot
alpha_info_tab %>%
  filter(Study == "Exp1") %>%
  ggplot(aes(x = Compartment, y = Observed, fill = Compartment)) +
  geom_boxplot() +
  #geom_jitter(width = 0.2) +  # Adds individual data points
  theme_pubr(border = TRUE) +
  labs(x = "Compartment", y = "Observed") +
  ggtitle("Boxplot of Observed Values in Exp1 by Compartment") #+

  #scale_fill_manual(values = cols_compartment)  

7 Sort the compartment and add color

# Different ways, by hand is useful for small data
cols_compartment <- c("Fecal" = "#D46C4E", "AC" = "#77A515", "TC" = "#264D59", "DC" = "#43978D")


# Using color brewer
# Get the unique compartments
unique_compartments <- unique(alpha_info_tab_filtered$Compartment)

# Automatically generate a color palette (you can use a predefined set or any other color scale)
cols_compartment_auto <- setNames(
  RColorBrewer::brewer.pal(length(unique_compartments), "Set3"), 
  unique_compartments
)

8 Plotting

# Filter for Exp1, reorder Compartment, and create the boxplot
alpha_info_tab %>%
  filter(Study == "Exp1") %>%
  mutate(Compartment = factor(Compartment, levels = c("Fecal", "AC", "TC", "DC"))) %>%
  ggplot(aes(x = Compartment, y = Observed, fill = Compartment)) +
  geom_boxplot() +
  theme_pubr(border = TRUE) +
  labs(x = "Compartment", y = "Observed") +
  ggtitle("Boxplot of Observed Values in Exp1 by Compartment") +
  scale_fill_manual(values = cols_compartment)

  #scale_fill_manual(values = cols_compartment_auto)# Assuming 'cols_compartment' contains your custom colors
# Saving
#ggsave("/Users/flb202/Documents/KU/BRIC/Projects/Arnes_Kristina/Results/03_PCAPlot.png", PCAPlot, width = 8.0, height = 5.5)

9 facet_wrap() funcion, mutate

#library(dplyr)
#library(ggplot2)

test <- alpha_info_tab %>%
  filter(Study == "Exp1") %>%  # Filter for Study "Exp1"
  # Create a new column `Units` that duplicates Fecal for both Unit1 and Unit2
  bind_rows(
    alpha_info_tab %>%
      filter(Compartment == "Fecal") %>%
      mutate(Units = "Unit1"),
    alpha_info_tab %>%
      filter(Compartment == "Fecal") %>%
      mutate(Units = "Unit2"),
    alpha_info_tab %>%
      filter(Compartment != "Fecal") %>%
      mutate(Units = as.character(Unit))  # Keep original Unit for other compartments
  ) %>%
  mutate(Compartment = factor(Compartment, levels = c("Fecal", "AC", "TC", "DC"))) %>%
  # Remove rows with NA in Compartment or Units columns
  filter(!is.na(Compartment) & !is.na(Units)) %>%
  ggplot(aes(x = Compartment, y = Observed, fill = Compartment)) +
  geom_boxplot() +  # Create the boxplot
  theme_pubr(border = TRUE) +  # Apply the theme
  labs(x = "Compartment", y = "Observed") +
  ggtitle("Boxplot of Observed Values in Exp1 by Compartment and Units") +
  scale_fill_manual(values = cols_compartment) +  # Apply custom colors
  facet_wrap(~ Units, scales = "free_x")
test

9.1 Longitudinal

# Extra

# Define the y-axis scale
#scales <- scale_y_continuous(limits = c(0, 129))

# Plotting
alpha_info_tab %>%
  filter(Study == "Exp1") %>%  # Filter for Exp1
  gather(key = "metric", value = "value", c("Observed", "Shannon")) %>%  # Reshape data
  mutate(
    metric = recode(metric, "Observed" = "Richness", "Shannon" = "Shannon")  # Change names directly in the data
  ) %>% 
  
  ggplot(aes(x = Day, y = value, color = Compartment, group = interaction(Study, Compartment_Unit))) +
  
  # Add vertical lines for day intervals
  geom_vline(xintercept = 1:23, linetype = 'solid', colour = "grey", alpha = 0.3) +
  
  # Plot points and lines
  geom_point(size = 4) +
  geom_line(aes(linetype = Unit)) +
  
  # Customize plot theme
  theme_pubr(border = TRUE) +
  theme(
    axis.text.x = element_text(size = 8, hjust = 0.5),
    axis.text.y = element_text(size = 8, hjust = 1),
    legend.position = "top"
  ) +
  
  # Custom color and line types
  scale_colour_manual(values = cols_compartment) +
  scale_linetype_manual(values = c("dotted", "solid", "dashed", "longdash")) +
  
  # Define x-axis labels
  scale_x_discrete(limits = c("Fecal", as.character(1:23))) +
  
  # Add labels for axes
  labs(x = "", y = "") +
  
  # Facet the plot by metric
  facet_grid(rows = vars(metric), scales = "free_y", space = "free_x") #+

  # Apply custom y-axis scale in facets (no labels, just limits)
  # facetted_pos_scales(y = scales)

Next Analysis

10 Beta Diversity, PCA

ord_DataFrame <- read.table("data/raw/02_ord_DataFrame.tsv",  header=T, sep="\t", row.names=1, check.names=T)

ord_DataFrame %>%
  knitr::kable() %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
X45.7. X19.4. Study Unit Compartment Compartment_Unit Day Triplicate Main_Analysis
Exp1_F2 -0.0700905 -0.3706615 Exp1 Fecal Fecal Fecal 0 A2 YES
Exp1_0A1 -0.0966809 -0.4513312 Exp1 Unit1 AC AC1 1 NO YES
Exp1_0A2 -0.0937636 -0.4653644 Exp1 Unit2 AC AC2 1 NO YES
Exp1_0D1 -0.0684402 -0.4998071 Exp1 Unit1 DC DC1 1 NO YES
Exp1_0D2 -0.0667823 -0.4909481 Exp1 Unit2 DC DC2 1 NO YES
Exp1_0T1 -0.0657618 -0.5205511 Exp1 Unit1 TC TC1 1 NO YES
Exp1_0T2 -0.0683041 -0.5175081 Exp1 Unit2 TC TC2 1 NO YES
Exp1_12A1_2 0.2511365 0.1368428 Exp1 Unit1 AC AC1 12 A2 YES
Exp1_12A2_2 0.2469539 0.1242978 Exp1 Unit2 AC AC2 12 A2 YES
Exp1_12D1_2 0.2474297 0.0501418 Exp1 Unit1 DC DC1 12 A2 YES
Exp1_12D2_2 0.2456813 0.0240495 Exp1 Unit2 DC DC2 12 A2 YES
Exp1_12T1_2 0.2700372 0.0961836 Exp1 Unit1 TC TC1 12 A2 YES
Exp1_12T2_2 0.2668607 0.0869340 Exp1 Unit2 TC TC2 12 A2 YES
Exp1_13A1 0.2525283 0.1310443 Exp1 Unit1 AC AC1 13 NO YES
Exp1_13A2 0.2501388 0.1338156 Exp1 Unit2 AC AC2 13 NO YES
Exp1_13D1 0.2446710 0.0737441 Exp1 Unit1 DC DC1 13 NO YES
Exp1_13D2 0.2480532 0.0465521 Exp1 Unit2 DC DC2 13 NO YES
Exp1_13T1 0.2668749 0.1237862 Exp1 Unit1 TC TC1 13 NO YES
Exp1_13T2 0.2636917 0.0934710 Exp1 Unit2 TC TC2 13 NO YES
Exp1_15A1 0.2529885 0.1382319 Exp1 Unit1 AC AC1 15 NO YES
Exp1_15A2 0.2574696 0.1464790 Exp1 Unit2 AC AC2 15 NO YES
Exp1_15D1 0.2225984 0.0710222 Exp1 Unit1 DC DC1 15 NO YES
Exp1_15D2 0.2452289 0.0477370 Exp1 Unit2 DC DC2 15 NO YES
Exp1_15T1 0.2572078 0.1080404 Exp1 Unit1 TC TC1 15 NO YES
Exp1_15T2 0.2601346 0.1038236 Exp1 Unit2 TC TC2 15 NO YES
Exp1_16A1 0.2543099 0.1239164 Exp1 Unit1 AC AC1 16 NO YES
Exp1_16A2 0.2579028 0.1278760 Exp1 Unit2 AC AC2 16 NO YES
Exp1_16D1 0.2231274 0.0715852 Exp1 Unit1 DC DC1 16 NO YES
Exp1_16D2 0.2262388 0.0555737 Exp1 Unit2 DC DC2 16 NO YES
Exp1_16T1 0.2553281 0.1097970 Exp1 Unit1 TC TC1 16 NO YES
Exp1_16T2 0.2555997 0.1028882 Exp1 Unit2 TC TC2 16 NO YES
Exp1_17A1_2 0.2571796 0.1348213 Exp1 Unit1 AC AC1 17 A2 YES
Exp1_17A2_2 0.2567410 0.1364964 Exp1 Unit2 AC AC2 17 A2 YES
Exp1_17D1_2 0.2264776 0.0695303 Exp1 Unit1 DC DC1 17 A2 YES
Exp1_17D2_2 0.2288960 0.0455693 Exp1 Unit2 DC DC2 17 A2 YES
Exp1_17T1_2 0.2587796 0.0884254 Exp1 Unit1 TC TC1 17 A2 YES
Exp1_17T2_2 0.2601908 0.1170307 Exp1 Unit2 TC TC2 17 A2 YES
Exp1_20A1 0.2592540 0.1419959 Exp1 Unit1 AC AC1 20 NO YES
Exp1_20A2 0.2517814 0.1363950 Exp1 Unit2 AC AC2 20 NO YES
Exp1_20D1 0.2039536 0.0762874 Exp1 Unit1 DC DC1 20 NO YES
Exp1_20D2 0.2156043 0.0351714 Exp1 Unit2 DC DC2 20 NO YES
Exp1_20T1 0.2424306 0.1104838 Exp1 Unit1 TC TC1 20 NO YES
Exp1_20T2 0.2484694 0.1037655 Exp1 Unit2 TC TC2 20 NO YES
Exp1_21A1 0.2536555 0.1174449 Exp1 Unit1 AC AC1 21 NO YES
Exp1_21A2 0.2604426 0.1301304 Exp1 Unit2 AC AC2 21 NO YES
Exp1_21D1 0.2123776 0.0742765 Exp1 Unit1 DC DC1 21 NO YES
Exp1_21D2 0.2081537 0.0575063 Exp1 Unit2 DC DC2 21 NO YES
Exp1_21T1 0.2398925 0.1198696 Exp1 Unit1 TC TC1 21 NO YES
Exp1_21T2 0.2491966 0.1015611 Exp1 Unit2 TC TC2 21 NO YES
Exp1_23A1_2 0.2505437 0.1136735 Exp1 Unit1 AC AC1 22 A2 YES
Exp1_23A2_2 0.2564423 0.1163847 Exp1 Unit2 AC AC2 22 A2 YES
Exp1_23D1_2 0.2128353 0.0555137 Exp1 Unit1 DC DC1 22 A2 YES
Exp1_23D2_2 0.2252923 0.0451506 Exp1 Unit2 DC DC2 22 A2 YES
Exp1_23T1_2 0.2460740 0.1112777 Exp1 Unit1 TC TC1 22 A2 YES
Exp1_23T2_2 0.2509772 0.1037091 Exp1 Unit2 TC TC2 22 A2 YES
Exp1_2A1_2 -0.0542209 -0.4860391 Exp1 Unit1 AC AC1 2 A2 YES
Exp1_2A2_2 -0.0616063 -0.4857363 Exp1 Unit2 AC AC2 2 A2 YES
Exp1_2D1_2 -0.0537886 -0.5197329 Exp1 Unit1 DC DC1 2 A2 YES
Exp1_2D2_2 -0.0478566 -0.5097330 Exp1 Unit2 DC DC2 2 A2 YES
Exp1_2T1_2 -0.0421125 -0.5147059 Exp1 Unit1 TC TC1 2 A2 YES
Exp1_2T2_2 -0.0471718 -0.5119103 Exp1 Unit2 TC TC2 2 A2 YES
Exp1_5A1 0.2458645 0.0577689 Exp1 Unit1 AC AC1 5 NO YES
Exp1_5A2 0.2108324 -0.0430012 Exp1 Unit2 AC AC2 5 NO YES
Exp1_5D1 0.0931342 -0.2867966 Exp1 Unit1 DC DC1 5 NO YES
Exp1_5D2 0.0600850 -0.3152770 Exp1 Unit2 DC DC2 5 NO YES
Exp1_5T1 0.1995305 -0.1289747 Exp1 Unit1 TC TC1 5 NO YES
Exp1_5T2 0.1359410 -0.2232990 Exp1 Unit2 TC TC2 5 NO YES
Exp1_6A1 0.2356668 0.0865840 Exp1 Unit1 AC AC1 6 NO YES
Exp1_6A2 0.2433493 0.0717020 Exp1 Unit2 AC AC2 6 NO YES
Exp1_6D1 0.1794178 -0.1574569 Exp1 Unit1 DC DC1 6 NO YES
Exp1_6D2 0.1493715 -0.1865668 Exp1 Unit2 DC DC2 6 NO YES
Exp1_6T1 0.2425770 -0.0131605 Exp1 Unit1 TC TC1 6 NO YES
Exp1_6T2 0.2225351 -0.0764632 Exp1 Unit2 TC TC2 6 NO YES
Exp1_7A1_2 0.2410020 0.0642033 Exp1 Unit1 AC AC1 7 A2 YES
Exp1_7A2_2 0.2275324 0.0242377 Exp1 Unit2 AC AC2 7 A2 YES
Exp1_7D1_2 0.2167371 -0.0834913 Exp1 Unit1 DC DC1 7 A2 YES
Exp1_7D2_2 0.1987917 -0.1155900 Exp1 Unit2 DC DC2 7 A2 YES
Exp1_7T1_2 0.2514338 0.0032591 Exp1 Unit1 TC TC1 7 A2 YES
Exp1_7T2_2 0.2372218 -0.0457467 Exp1 Unit2 TC TC2 7 A2 YES
Exp2_101 -0.3946628 -0.0125345 Exp2 Unit1 AC AC1 1 NO YES
Exp2_107 -0.4703817 0.1407767 Exp2 Unit1 AC AC1 21 NO YES
Exp2_109 -0.3302521 -0.0917644 Exp2 Unit1 DC DC1 1 NO YES
Exp2_112 -0.5030497 0.1166510 Exp2 Unit1 AC AC1 7 NO YES
Exp2_113 -0.4715645 0.0966926 Exp2 Unit1 DC DC1 5 NO YES
Exp2_114 -0.3844721 0.0978012 Exp2 Unit1 DC DC1 21 NO YES
Exp2_125 -0.4310609 0.1020624 Exp2 Unit1 DC DC1 13 NO YES
Exp2_129 -0.4229217 0.0959367 Exp2 Unit1 DC DC1 15 NO YES
Exp2_15 -0.4717850 0.1129392 Exp2 Unit1 DC DC1 7 NO YES
Exp2_18 -0.3799263 0.1094529 Exp2 Unit1 DC DC1 25 NO YES
Exp2_20 -0.4298488 0.1078088 Exp2 Unit1 DC DC1 11 NO YES
Exp2_22 -0.4982336 0.1101436 Exp2 Unit1 AC AC1 29 NO YES
Exp2_25 -0.4088843 0.0363510 Exp2 Unit1 DC DC1 3 NO YES
Exp2_26 -0.4342018 0.1012039 Exp2 Unit1 DC DC1 9 NO YES
Exp2_3 -0.4818679 0.1309512 Exp2 Unit1 AC AC1 25 NO YES
Exp2_36 -0.3682212 0.1018673 Exp2 Unit1 DC DC1 29 NO YES
Exp2_40 -0.4954695 0.1195638 Exp2 Unit1 AC AC1 13 NO YES
Exp2_43 -0.4869123 0.1117271 Exp2 Unit1 AC AC1 31 NO YES
Exp2_50 -0.4907559 0.1237128 Exp2 Unit1 AC AC1 19 NO YES
Exp2_51 -0.4525669 0.0620748 Exp2 Unit1 AC AC1 3 NO YES
Exp2_58 -0.4923113 0.1170102 Exp2 Unit1 AC AC1 17 NO YES
Exp2_60 -0.4914208 0.1211465 Exp2 Unit1 AC AC1 11 NO YES
Exp2_61 -0.3504377 0.0826640 Exp2 Unit1 DC DC1 23 NO YES
Exp2_62 -0.4981926 0.1126525 Exp2 Unit1 AC AC1 9 NO YES
Exp2_68 -0.3678534 0.0994831 Exp2 Unit1 DC DC1 31 NO YES
Exp2_7 -0.3129596 -0.0551986 Exp2 Fecal Fecal Fecal 0 NO YES
Exp2_70 -0.4418437 0.1023092 Exp2 Unit1 DC DC1 17 NO YES
Exp2_77 -0.4270004 0.0976050 Exp2 Unit1 DC DC1 19 NO YES
Exp2_78 -0.5025657 0.1174558 Exp2 Unit1 AC AC1 27 NO YES
Exp2_8 -0.3910134 0.1084368 Exp2 Unit1 DC DC1 27 NO YES
Exp2_83 -0.4604354 0.1398532 Exp2 Unit1 AC AC1 23 NO YES
Exp2_88 -0.4943289 0.1122258 Exp2 Unit1 AC AC1 15 NO YES
Exp2_99 -0.5148743 0.1127323 Exp2 Unit1 AC AC1 5 NO YES

10.0.1 Beta Diversity

ggplot(ord_DataFrame, aes(x = X45.7., y = X19.4., color = Compartment_Unit, shape = Unit)) +
  geom_point(size = 5) #+ 

  #scale_shape_manual(values = c(18, 19, 17)) +
  #theme_pubr(border = TRUE) +
  #coord_fixed(ratio = 1) 

10.1 Adding color and more nuances

unique(ord_DataFrame$Compartment_Unit)
## [1] "Fecal" "AC1"   "AC2"   "DC1"   "DC2"   "TC1"   "TC2"
# compartment Unit
compartment_unit_col <- c("Fecal" = "#264D59", "AC1" = "#77A515", "TC1" = "#D46C4E", "DC1" = "#43978D", "AC2" = "#77A515", "TC2" = "#D46C4E", "DC2" = "#43978D")

ordplot2 <- ggplot(ord_DataFrame, aes(x = X45.7., y = X19.4., color = Compartment_Unit, shape = Unit)) +
  geom_hline(yintercept = 0, linetype = 'dashed', alpha = 0.3) + 
  geom_vline(xintercept = 0, linetype = 'dashed', alpha = 0.3) +
  geom_point(size = 5) + 
  #geom_text_repel(aes(label = Study), nudge_x = 0.06, size = 3.0, segment.alpha = 0.5) +
  scale_shape_manual(values = c(18, 19, 17)) +  
  theme_pubr(border = TRUE) +
  coord_fixed(ratio = 1) + # Keep aspect ratio 1:1
  theme(axis.text = element_text(size = 14),
        axis.text.x = element_text(size = 12, hjust = 0.5),
        axis.title.y = element_text(size = 18),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 0),
        legend.position = "bottom", #top, null etc
        axis.title.x = element_text(size = 18),
        strip.text.x = element_text(size = 20, face = "bold")) +
        scale_color_manual(values = compartment_unit_col) +
  labs(x = "PCo1 [45.7%]", y = "PCo2 [19.4%]", color = "Compartment_Unit", shape = "Unit") #+
  #annotate("text", x = min(ord_DataFrame$X45.7.), y = min(ord_DataFrame$X19.4.), 
           #label = "Exp2", hjust = 0.1, vjust = 0.4, size = 5, color = "black")

# To view the plot
print(ordplot2)

11 Heatmap

12 HeatMAp and BarPlot

Step 1: Filter the data: Remove ASVs that do not show appear more than 3 times in more than 30% the samples
Step 2: Transform the data: Relative Abundance

12.0.1 HeatMap

heat_df <- read.table("data/raw/03_HeatMap_Exp1_DCs.tsv", header=T, sep="\t", row.names=1, check.names=T)
class(heat_df)
## [1] "data.frame"
heat_mat <- as.matrix(heat_df)
class(heat_mat)
## [1] "matrix" "array"

13 Filter top 20

# Step 1: Calculate row sums or another measure to identify top taxa
heat_df$Total <- rowSums(heat_df) # or use `rowMeans` if preferred

# Step 2: Filter the top 20 rows based on Total
top20_data <- heat_df %>%
  arrange(desc(Total)) %>%
  slice(1:20) %>%
  select(-Total)

# Step 3: Convert to matrix for ComplexHeatmap
data_matrix <- as.matrix(top20_data)
#data_matrix <- as.matrix(heat_df)

14 Plot

library(ComplexHeatmap)
library(circlize)  # For colorRamp2 function if needed
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
# Create heatmap with ComplexHeatmap
Heatmap(data_matrix,
        name = "Abundance",
        col = colorRamp2(c(min(data_matrix), median(data_matrix), max(data_matrix)), 
                         c("blue", "white", "red")),
        show_row_names = TRUE,
        show_column_names = TRUE,
        cluster_rows = TRUE,
        cluster_columns = TRUE
)

14.1 more

# Define sample compartments
ncol(data_matrix)
## [1] 13
# Create Compartment labels for the samples, repeating "DC1" for all columns in data_matrix
compartment <- rep("DC1", ncol(data_matrix))

# Check if the lengths match
length(compartment) == ncol(data_matrix)  # Should return TRUE
## [1] TRUE
# Colors for compartments
compartment_colors <- c("DC1" = "#43978D")


# Create the annotation
sample_annotation <- HeatmapAnnotation(
    Compartment = compartment,
    col = list(Compartment = compartment_colors)
)

# Create the heatmap with the annotation
Heatmap(
  data_matrix,
  name = "Abundance",
  col = colorRamp2(
    c(min(data_matrix), median(data_matrix), max(data_matrix)),  # Color scale points
    c("blue", "white", "red")  # Colors corresponding to min, median, and max values
  ),
  show_row_names = TRUE,       # Show row names
  show_column_names = TRUE,     # Show column names
  cluster_rows = TRUE,          # Cluster rows
  cluster_columns = TRUE,       # Cluster columns
  top_annotation = HeatmapAnnotation(
    Compartment = anno_simple(
      compartment,               # Variable for compartment annotation
      col = compartment_colors   # Custom colors for compartments
    )
  )
)

15 Pheatmap

## Change colors
colsHeat<- c("#F7F7F7", "#92C5DE", "#0571B0", "#F4A582", "#CA0020")

# Create an annotation dataframe
annotation_df <- data.frame(Compartment = compartment)

# Define annotation colors
annotation_colors <- list(Compartment = c("DC1" = "#43978D"))

# Plot heatmap using pheatmap
pheatmap(data_matrix,
         cluster_cols = FALSE,
         cluster_rows = TRUE,
         #scale = "column",
         #gaps_row = 5, 
         clustering_distance_rows = "euclidean",
         clustering_distance_cols  = "euclidean",
         annotation_colors = annotation_colors, 
         annotation_col = annotation_df,  
         show_colnames = TRUE,
         color = colorRampPalette(c(colsHeat))(50),
         border_color = "#f8edeb",
         display_numbers = FALSE)

16 Sort and “normalization”

The code is performing a column-wise normalization on data_matrix by dividing each element in a column by the mean of that column.

Purpose of the Code The result, stored in data, is a transformed version of data_matrix where each column has been scaled so that the average value in each column is 1. This transformation is often done to normalize the data across columns to account for differences in scale or to control for sample-specific variation in abundance data.

# Row-wise normalization
#data <- apply(data_matrix, 1, function(x) { x / mean(x) })

# Column-wise normalization
data <- apply(data_matrix, 2, function(x) { x / mean(x) })

# Different normalization
#sequencing_depth <- 20000
#read_counts_matrix <- data_matrix * sequencing_depth
# Step 2: Log-normalize each value in the matrix
#data <- log10(1 + read_counts_matrix) / max(log10(1 + read_counts_matrix))

# Sort by Days
colnames(data) <- colnames(data)[order(as.numeric(gsub("Exp1_(\\d+).*", "\\1", colnames(data))))]

pheatmap(data,
         cluster_cols = FALSE,
         cluster_rows = TRUE,
         #scale = "column",
         #gaps_row = 5, 
         clustering_distance_rows = "euclidean",
         clustering_distance_cols  = "euclidean",
         annotation_colors = annotation_colors, 
         annotation_col = annotation_df,  # Add annotation dataframe here
         show_colnames = TRUE,
         color = colorRampPalette(c(colsHeat))(50),
         border_color = "#f8edeb",
         display_numbers = FALSE)

17 Iteractive

18 Plotly

library(plotly)
# Create the main heatmap for your data
p <- plot_ly(
    x = colnames(data),  # Column names as x-axis
    y = rownames(data),   # Row names as y-axis
    z = data,             # Data matrix
    type = "heatmap", 
    colors = colsHeat,
    showscale = TRUE,
    colorbar = list(title = "Abundance")  # Optional colorbar title
) %>%
    layout(
        margin = list(l = 120),  # Space for row names on the left
        xaxis = list(showticklabels = TRUE, ticks = ""),
        yaxis = list(tickvals = 1:nrow(data), ticktext = rownames(data))
    )

p
# Create the compartment annotation heatmap (only DC1, shown in #43978D)
compartment_annotation <- rep(0, ncol(data))  # Only 0s for DC1

p_compartment <- plot_ly(
    x = colnames(data),
    y = "Compartment",  # Annotation row title
    z = matrix(compartment_annotation, nrow = 1),  # Annotation as a single-row matrix
    type = "heatmap",
    colorscale = list(list(0, "#43978D"), list(1, "#43978D")),  # Fixed color for DC1 only
    showscale = FALSE  # No colorbar for annotation
) %>%
    layout(
        xaxis = list(showticklabels = TRUE, ticks = ""),
        yaxis = list(
            tickvals = c(0),       # Position the annotation title
            ticktext = "Compartment"
        )
    )

# Combine the main heatmap and the annotation strip
subplot(p_compartment, p, nrows = 2, heights = c(0.1, 0.9), shareX = TRUE)

19 Different package